clear 
clear matrix
clear mata
set more off
set varabbrev off
set mem 100000000

* path to main folder for Data
global mystart  "Replication JPE\Data"

* Folder where we store other intermediate data files for analysis
global data "${mystart}\CPS\CPS Data Files\Intermediate Data Files\"

global out_figures "${mystart}\Tables and Figures\"
global out_tables "${mystart}\Tables and Figures\"

* Folder where we store parameters for model
global data_model "${mystart}\Parameters for Model\"


*** Basic cleaning data 
* also Table 118 - Citizenship
qui {
use "${mystart}\CPS\CPS Data Files\Raw CPS Data files\Extract 2024.dta"
keep if sex==1

*since we use information on wages and employment from last year
gen age_actual=age	
replace age=age-1
	
keep if age>=25 & age<65

***keep adult civilian
keep if popstat==1
drop popstat

keep if relate==101 // keep hh head
rename asecwt perwt // ASEC is the March CPS with more variables. 
drop if perwt<=0
replace perwt=int(perwt)
 
*** drop if attending high school or college full time or part time
drop if schlcoll>=1 & schlcoll<=4

keep if year>=2001 & year<=2013

replace educ=73 if higrade==150
replace educ=110 if higrade>=151 & higrade<=181
replace educ=111 if higrade>=190 & higrade<=210

*** EDUCATION GROUPS ***
gen education_group=.
replace education_group=1 if educ<=72 & educ!=1 & educ!=.   /* less than high-school */
replace education_group=2 if educ==73              /* high-school degree */
replace education_group=3 if educ>=80 & educ<110  /* some college but no Bachelor's degree (includes those with associate's degree in vocational programs) */
replace education_group=4 if educ>=110 & educ<=125 & educ!=. /* has Bachelor's degree or more */
drop if educ==999 /* missing */


gen education_group_ma_or_more=1 if educ==123 | educ==124 | educ==125

gen college=0 if education_group==2
replace college=1 if education_group==4

label define col 0 "High School" 1 "College"
label values college col
label variable college "College"

gen college_alt=1 if education_group==2 | education_group==1
replace college_alt=2 if education_group==3
replace college_alt=3 if education_group==4

label define ed_a 1 "High School or Less" 2 "Some College" 3 "College"
label values college_alt ed_a
label variable college_alt "Education"


gen married=0 if marst==3 | marst==4 | marst==5 | marst==6 | marst==2
replace married=1 if marst==1 
label define lab_m 0 "Single" 1 "Married"
label values married lab_m
label variable married "Marital Status"

***Adjust earnings by CPI 
*http://www.bls.gov/cpi/cpid1402.pdf
gen CPI=.
replace	CPI	=	29.9	if 	year==	1962
replace	CPI	=	30.2	if 	year==	1963
replace	CPI	=	30.6	if 	year==	1964
replace	CPI	=	31	if 	year==	1965
replace	CPI	=	31.5	if 	year==	1966
replace	CPI	=	32.4	if 	year==	1967
replace	CPI	=	33.4	if 	year==	1968
replace	CPI	=	34.8	if 	year==	1969
replace	CPI	=	36.7	if 	year==	1970
replace	CPI	=	38.8	if 	year==	1971
replace	CPI	=	40.5	if 	year==	1972
replace	CPI	=	41.8	if 	year==	1973
replace	CPI	=	44.4	if 	year==	1974
replace	CPI	=	49.3	if 	year==	1975
replace	CPI	=	53.8	if 	year==	1976
replace	CPI	=	56.9	if 	year==	1977
replace	CPI	=	60.6	if 	year==	1978
replace	CPI	=	65.2	if 	year==	1979
replace	CPI	=	72.6	if 	year==	1980
replace	CPI	=	82.4	if 	year==	1981
replace	CPI	=	90.9	if 	year==	1982
replace	CPI	=	96.5	if 	year==	1983
replace	CPI	=	99.6	if 	year==	1984
replace	CPI	=	103.9	if 	year==	1985
replace	CPI	=	107.6	if 	year==	1986
replace	CPI	=	109.6	if 	year==	1987
replace	CPI	=	113.6	if 	year==	1988
replace	CPI	=	118.3	if 	year==	1989
replace	CPI	=	124	if 	year==	1990
replace	CPI	=	130.7	if 	year==	1991
replace	CPI	=	136.2	if 	year==	1992
replace	CPI	=	140.3	if 	year==	1993
replace	CPI	=	144.5	if 	year==	1994
replace	CPI	=	148.2	if 	year==	1995
replace	CPI	=	152.4	if 	year==	1996
replace	CPI	=	156.9	if 	year==	1997
replace	CPI	=	160.5	if 	year==	1998
replace	CPI	=	163	if 	year==	1999
replace	CPI	=	166.6	if 	year==	2000
replace	CPI	=	172.2	if 	year==	2001
replace	CPI	=	177.1	if 	year==	2002
replace	CPI	=	179.9	if 	year==	2003
replace	CPI	=	184	if 	year==	2004
replace	CPI	=	188.9	if 	year==	2005
replace	CPI	=	195.3	if 	year==	2006
replace	CPI	=	201.6	if 	year==	2007
replace	CPI	=	207.342	if 	year==	2008
replace	CPI	=	215.303	if 	year==	2009
replace	CPI	=	214.537	if 	year==	2010
replace	CPI	=	218.056	if 	year==	2011
replace	CPI	=	224.939	if 	year==	2012
replace	CPI	=	229.594	if 	year==	2013
replace	CPI	=	232.957	if 	year==	2014
replace	CPI	=	236.736	if 	year==	2015

replace year=year-1

*convert CPI so that year 2010=100. All dollar amounts in the paper will be 2006 CPI adjusted dollars
replace CPI = CPI/218.056*100


*INCWAGE indicates each respondent's total pre-tax wage and salary income--
*        that is, money received as an employee--for the previous calendar year.
* 999999 = NIU
* 999998 = missing
gen incint_CPI_adjusted = incint*100/ CPI

replace incwage=. if incwage==999999 | incwage==999998
gen  incwage_CPI_adjusted= incwage*100/ CPI
*INCTOT indicates each respondent's total pre-tax personal income or losses 
*         from all sources for the previous calendar year.

gen Tax_liability = fedtaxac + stataxac
gen Tax_liability_CPI_adj=Tax_liability*100/ CPI

gen taxinc_CPI_adj=taxinc*100/ CPI

replace inctot=. if inctot==999999 | inctot==999998
gen inctotal_CPI_adjusted= inctot*100/CPI

replace uhrsworkly=0 if uhrsworkly==999
	gen hours=wkswork1*uhrsworkly // note this did not have "ly" at end in the ORIGINAL
	gen incwage_hourly=incwage_CPI_adjusted/hours

*HEALTH STATUS
keep if health==1 | health==2 | health==3 | health==4 | health==5 
	gen health_status=0 if health==5 | (disabwrk==2 & diffcare==2)
	replace health_status=1 if health==4 | health==3 | (disabwrk==2 | diffcare==2 | diffphys==2 | diffmob==2)
	replace health_status=2 if (health==1 | health==2) & (disabwrk!=2 & diffcare!=2 & diffphys!=2 & diffmob!=2)


gen labor_force=5 if hours>1500 & inclugh==2 & hours!=. // & hinsemp==2 
replace labor_force=4 if hours>1500 & inclugh!=2  & hours!=. //hinsemp!=2
replace labor_force=3 if hours<=1500 & hours>=520 & inclugh==2 // & hinsemp==2
replace labor_force=2 if hours<=1500 & hours>=520 & inclugh!=2 // & hinsemp!=2
replace labor_force=1 if hours<520 & hours>=0

tabstat hours if (labor_force==2 | labor_force==3 ) [fweight=perwt], stat(mean p50)
tabstat hours if (labor_force==4 | labor_force==5 ) [fweight=perwt], stat(mean p50)

		drop if education_group==.
*percentage of working age population by educ
		tab education_group if age>=25 & age<65 [fweight=perwt]
sort famid year
		
gen Mar_kids = 1 if married==0 & nchild==0
replace Mar_kids = 2 if married==0 & nchild>0 
replace Mar_kids = 3 if married==1 & nchild==0 
replace Mar_kids = 4 if married==1 & nchild>0 

label define mk 1 "Single, No Children" 2 "Single, w/ Children" 3 "Married, No Children" 4 "Married, w/ Children"
label values Mar_kids mk

** Keep HS or less
keep if college_alt==1

gen race_alt=1 if race==200 |  race==805 | race==806 | race==807
replace race_alt=2 if hispan>0 & hispan<901
tab race_alt
drop race 
rename race_alt race

replace race=3 if race==2 // hispanics
replace race=2 if race==1
replace race=1 if race!=2 & race!=3 // whites, or non-hisp, non-black
label var race "Race"
label define lab_rac 1 "White" 2 "Black" 3 "Hispanic"
label values race lab_rac

* Table 118 - Citizenship
table () (race) if  age<65 & year>=2000 & year<=2013, stat(fvpercent citizen) nototals nformat(%5.1f) 
collect title "Citizenship Status, CPS (ASEC)"
collect export "${out_tables}/CPS_Citizenship.tex", tableonly replace

gen non_citizen=0 
replace non_citizen=1 if citizen==5

save "${data}\CPS_minorities.dta", replace
}


* Figure 29 
* export family size parameters for model 
*** FAMILY SIZE
qui {
clear 
use "${data}\CPS_minorities.dta"
keep if  age<65
gen Fam_size=2 if married==1 & nchild!=.
replace Fam_size=Fam_size+nchild if married==1 & nchild>0
tab Fam_size
keep if  age<65

collapse (mean) Fam_size [aweight=perwt], by(race age)

#delimit ;
line Fam_size age if race==1, yaxis(1) xaxis(1) lwidth(medthick) lcolor(edkblue)
|| line  Fam_size age if race==2 , lwidth(medthick) lcolor(edkblue)   lpattern(dash)
|| line  Fam_size age if race==3 , lwidth(medthick) lcolor(cranberry)   lpattern(dash)
  ||, 
  ylabel(2.0(0.5)5.00)
  xlabel(25(5)65)
  ytitle("Family Size")
xtitle("Age")
   title("Average Family Size if Married, CPS", color(black))
     legend(label(1 "White") label(2 "Black") label(3 "Hispanic")  )
	   legend(col(3) pos(6) region(lcolor(gs16)))
plotregion(margin(r+7 l+5) style(none))
  graphregion(icolor(white) fcolor(gs14) margin(none ))
  graphregion(color(white)) bgcolor(white)
  ;
#delimit cr
graph export "${out_tables}\Fam_size_races_CPS.eps", replace


gen age_sq=age^2
gen age_cub=age^3
reg Fam_size age age_sq age_cub if race==1
predict y1 if race==1
reg Fam_size age age_sq age_cub if race==2
predict y2 if race==2
reg Fam_size age age_sq age_cub if race==3
predict y3 if race==3
gen y=y1 if race==1
replace y=y2 if race==2
replace y=y3 if race==3
drop y1 y2  y3


drop if race==.

sort race age
preserve 
drop age_sq age_cub
rename y equiv
sort age race
drop Fam_size
save "${data}\Equiv_races.dta", replace
restore

preserve 
keep if race==2 // keep blacks
drop race
sort age
keep y
outsheet using "${data_model}/Blacks/parameters_fam_size.txt", nolabel nonames replace 
restore 



preserve 
keep if race==3 // keep Hispanics
drop race
sort age
keep y
outsheet using "${data_model}/Hispanics/parameters_fam_size.txt", nolabel nonames replace
restore 




}


* Figure 30  - fraction married
qui{
clear 
use "${data}\CPS_minorities.dta"
keep if  age<65


collapse (mean) married [aweight=perwt], by(race age)

#delimit ;
line married age if race==1, yaxis(1) xaxis(1) lwidth(medthick) lcolor(edkblue)
|| line   married age if race==2 , lwidth(medthick) lcolor(edkblue)   lpattern(dash)
|| line   married age if race==3 , lwidth(medthick) lcolor(cranberry)   lpattern(dash)
  ||, 
  ylabel(0.0(0.1)1.00)
  xlabel(25(5)65)
  ytitle("Fraction married")
xtitle("Age")
 
     legend(label(1 "White") label(2 "Black") label(3 "Hispanic")  )
	   legend(col(3) pos(6) region(lcolor(gs16)))
plotregion(margin(r+7 l+5) style(none))
  graphregion(icolor(white) fcolor(gs14) margin(none ))
  graphregion(color(white)) bgcolor(white)
  ;
#delimit cr
graph export "${out_tables}\Married_races_CPS.eps", replace
}


* SAVE DATA FOR FIGURE 31 
*** DISTRIBUTION OF EMPLOYMENT BY AGE 
qui{
clear 
use "${data}\CPS_minorities.dta"	
keep if race==2 | race==3
keep if age<65
collapse (count) number=serial [aweight=perwt], by(race age labor_force) fast //[aweight=perwt]
reshape wide number, i(age race) j(labor_force)
egen total=rowtotal(number1 number2 number3 number4 number5)
replace number1=number1/total*100
replace number2=number2/total*100
replace number3=number3/total*100
replace number4=number4/total*100
replace number5=number5/total*100
drop total

replace number2 = number2+  number3 // PT
replace number3 = number4 // FT, no ins
replace number4 = number5 
sort age race
save "${data}\CPS LS Ins Races.dta", replace

}


* cons floor 
* Saves data for Figure 32 
qui{
clear 
use "${data}\CPS_minorities.dta"	

keep if age<65

*  careful with the coding of these variables. 
*INCCHILD
*999999 = N.I.U. (Not in Universe)
replace incchild=0 if incchild==999999
*INCSSI
* 999999 = N.I.U. (Not in Universe).
replace incssi=0 if incssi==999999
* INCDISAB
* 9999999 = N.I.U. (Not in Universe).
replace incdisab=0 if incdisab==9999999
* INCWKCOM 
* 999999 = N.I.U. (Not in Universe).
replace incwkcom=0 if incwkcom==999999
*INCWELFR
* 999999 = N.I.U. (Not in Universe).
replace incwelfr=0 if incwelfr==999999



* measure that includes disability
gen TR=0 if   hinscaid==1  & incwelfr==0 &  incssi==0 & incdisab==0  
replace TR=1 if hinscaid==2 | (incwelfr>0 & incwelfr!=.) | (incssi>0 & incssi!=.) | (incdisab>0  & incdisab!=.) 
	
gen TR_value = (incwelfr+ incssi+ incdisab+ incchild	)	*100/ CPI
gen DI_value = (incssi+ incdisab)*100/ CPI
gen TR_value_non_DI = TR_value-DI_value	
gen DI_ssdi = (incdisab)*100/ CPI

			
gen DI_indic1 = 1 if ((incssi>0 & incssi!=.) | ( incdisab>0 & incdisab!=. )) & DI_value >500 
replace DI_indic1 = 0 if  (incssi==0 & incdisab==0) | (DI_value <500) //&   incwkcom==0

table race if age>=30 & age<=55, stat(mean TR  )
table race  if age>=30 & age<=55, stat(percent labor_force)
tab labor_force if race==2 & age>=30 & age<=55 [aweight=perwt]
tab labor_force if race==3 & age>=30 & age<=55 [aweight=perwt]

		
*  - c_min for those in poor H. We do singles, and then for married we multiply by a factor 
bysort race: tabstat TR_value [aweight=perwt] if  DI_value >500 & DI_value!=.  & married==0 //

 	
bysort race: tabstat TR [aweight=perwt] if age>=30 & age<=55 // fraction of ppl getting any transfer.
bysort race: tabstat TR [aweight=perwt] if age>=30 & age<=55 & DI_indic1 == 0 


label var DI_indic1 "DI"
label define lab_disab 0 "No DI" 1 "DI"
label values DI_indic1  lab_disab

label var TR "Any Gov Trans"
label define lab_tr 0 "No" 1 "Yes"
label values TR  lab_tr

keep if  age<64
drop if race==.




preserve		
		sort  race age
		collapse (mean)   TR [aweight=perwt], by( race age) fast
		gen data=0
		
		save "${data}\CPS TR all Races.dta", replace
		reshape wide TR , i(age) j( race)
restore


		sort  race age
		collapse (mean)   TR [aweight=perwt], by( race married age) fast
		gen data=0
		save "${data}\CPS TR Married Races.dta", replace
		
}


* Table 123 - wage distribution
qui{
clear 
use "${data}\CPS_minorities.dta"	
keep if race==2 | race==3
keep if age<65
drop if incwage_hourly<3.5

drop if incwage_CPI_adjusted>300000
* keep PT and FT 
		gen emp_type=. 
		replace emp_type=1 if (labor_force==2 | labor_force==3) 
		replace emp_type=2 if (labor_force==4 | labor_force==5) 
		drop if emp_type==.


keep if age>=40 & age<=50
		
collapse (p5) p5=incwage_hourly (p25) p25=incwage_hourly (p50) p50=incwage_hourly (p75) p75=incwage_hourly (p90) p90=incwage_hourly (p95) p95=incwage_hourly (p99) p99=incwage_hourly [aweight=perwt], by (race )	
gen data=0
save  "${data}\Wage_dist2_races.dta", replace
}



*** DISTRIBUTION OF EMPLOYMENT AT AGE 25 - target in calibration
qui{
clear 
use "${data}\CPS_minorities.dta"
keep if age==25 | age==26
keep if college_alt==1
collapse (count) number=serial [aweight=perwt], by( race labor_force) fast
reshape wide number, i(race) j(labor_force)
egen total=rowtotal(number1 number2 number3 number4 number5)
replace number1=number1/total*100
replace number2=number2/total*100
replace number3=number3/total*100
replace number4=number4/total*100
replace number5=number5/total*100
drop total
reshape long number , i(race) j(emp)
sort race emp

replace number=number/100
drop race emp
}